#!/usr/bin/env python3
# -*- coding: utf-8 -*-
##############################################
# @Author: DengLibin 榆霖
# @Date: Create in 2022-03-30 16:27:44
# @Description: 文本特征抽取
##############################################



from sklearn.feature_extraction.text import CountVectorizer


# 单词计数
def count_demo():
    """
    文本特征抽取
    """
    data = ['life is short, i like like python', 'life is to too long, i dislike python', '我是 中国 人']
    # 实例化转换器
    transfer = CountVectorizer()
    data_new = transfer.fit_transform(data)
    print(data_new)
    # 特征（单词）
    print(transfer.get_feature_names())
    print(data_new.toarray())


if __name__ == '__main__':
   count_demo()
